# -*- coding: utf-8 -*-
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

'''A utility collecting ci_test.yml workflow failure logs.

Usage:

  python collect_ci_test_logs.py --token ${github_toke} --folder ${folder}

'''

import github
import argparse
import json
import re
import logging
import os


REPO_OWNER = 'firebase'
REPO_NAME = 'firebase-android-sdk'
EXCLUDE_JOB_LIST = ['Determine changed modules','Unit Tests (matrix)','Publish Tests Results','Unit Test Results','Instrumentation Tests','Unit Tests']

def main():
  logging.getLogger().setLevel(logging.INFO)

  args = parse_cmdline_args()
  gh = github.GitHub(REPO_OWNER, REPO_NAME)

  token = args.token

  file_folder = args.folder
  if not os.path.exists(file_folder):
    logging.error(f'{file_folder} doesn\'t exist')
    exit(1)

  job_summary = json.load(open(os.path.join(file_folder, 'job_summary.json')))

  for job_name in job_summary:
    if job_name in EXCLUDE_JOB_LIST:
      continue

    job = job_summary[job_name]

    if job['failure_rate'] > 0:
      failure_rate = job['failure_rate']  
      total_count = job['total_count'] 
      success_count = job['success_count'] 
      failure_count = job['failure_count']      

      log_file_path = os.path.join(file_folder, f'{job_name}.log')
      file_log = open(log_file_path, 'w')
      file_log.write(f'\n{job_name}:\nFailure rate:{failure_rate:.2%} \nTotal count: {total_count} (success: {success_count}, failure: {failure_count})\nFailed jobs:')
      logging.info(f'\n\n{job_name}:\nFailure rate:{failure_rate:.2%} \nTotal count: {total_count} (success: {success_count}, failure: {failure_count})\nFailed jobs:')

      for failure_job in job['failure_jobs']:
        file_log.write('\n\n'+failure_job['html_url'])
        logging.info(failure_job['html_url'])
        job_id = failure_job['job_id']
        logs = gh.job_logs(token, job_id)
        if logs:
          # using regex to extract failure information
          failed_tasks = re.findall(r"Execution failed for task ':(.*?)'.", logs)
          for failed_task in failed_tasks:
            file_log.write('\n'+failed_task)
            pattern = fr'Task :{failed_task}(.*?)Task :{failed_task} FAILED'
            failed_tests = re.search(pattern, logs, re.MULTILINE | re.DOTALL)
            if failed_tests:
              file_log.write('\n'+failed_tests.group())

    file_log.close()

  logging.info(f'\n\nFinsihed collecting failure logs, log files locates under path: {file_folder}')
      

def parse_cmdline_args():
  parser = argparse.ArgumentParser(description='Collect certain Github workflow information and calculate failure rate.')
  parser.add_argument('-t', '--token', required=True, help='GitHub access token')
  parser.add_argument('-f', '--folder', required=True, help='Folder generated by workflow_information.py. Test logs also locate here.')
  args = parser.parse_args()
  return args

if __name__ == '__main__':
  main()
